;;; 
;;; disasm6502-oo.lisp
;;;
;;; Code to disassemble 6502 machine code.
;;;
;;; Based on disasm6502.lisp, try to make things more object
;;; oriented
;;;
;;; Joseph A. Oswald, III
;;; 30 October 2000
;;;
;;; $Version$
;;;
;;; $Log$
;;;

(defclass 6502-instruction ()
  ((opcode :initarg :opcode :accessor opcode)
   (bytes :initarg :bytes :accessor bytes 
:documentation "A list containing unsigned 8-bit bytes, or nil 
when the byte is unknown.")
   (mnemonic :initarg :mnemonic :accessor mnemonic)
   (address-mode :initarg :address-mode :accessor address-mode)
   (operand :initarg :operand :accessor operand)))

;;; for ordinary 6502-instructions,
;;; operand is simply the numeric argument, if it is known, or a string
;;; of the form "??" or "??xx" if it is not known.

;;; possibly add pseudo-ops, e.g. opcode NIL mnemonic :DFB address-mode :implied
;;; operand #x01 for a "define byte"

(defun disassemble-6502-instruction (code-array offset address)
  "Decodes a 6502 instruction at OFFSET in CODE-ARRAY, which is presumed
to correspond to a physical ADDRESS.

   Returns the corresponding 6502-instruction, 
           the offset of the following instruction, and
           the address of the following instruction (which may wrap around in the 64k
address space of the 6502."

  (let* ((bytes-available (- (length code-array) offset))
         (opcode (aref code-array offset))
         (mnemonic (6502-mnemonic opcode))
         (instruction-length (6502-instruction-length opcode))
         (full-instruction (<= instruction-length bytes-available))
         (disasm-length (min instruction-length bytes-available))
         (address-mode (6502-address-mode opcode))
         (bytes-present  (map 'list #'identity (subseq code-array offset 
                                                       (+ offset 
							  disasm-length))))
         (bytes (if full-instruction
                  bytes-present
                  (append bytes-present (make-list (- instruction-length 
						      disasm-length)))))
                             
         (arg-1 (if (>= disasm-length 2)
                  (aref code-array (+ offset 1))
                  "??"))
         (arg-2 (if (>= disasm-length 3)
                  (+ (aref code-array (+ offset 1))
                     (* 256 (aref code-array (+ offset 2))))
                  (format nil "??~A" arg-1))))
    
    (values
     (make-instance '6502-instruction
       :opcode opcode
       :bytes bytes
       :mnemonic mnemonic
       :address-mode address-mode
       :operand
       (ecase address-mode
         ((:implied :accumulator nil) nil)
         ((:immediate 
           :zero-page :zero-page-x :zero-page-y
           :zp-indirect-x :zp-indirect-y :branch-relative) arg-1)
         ((:absolute :absolute-x :absolute-y :indirect)
          arg-2)))
     
     (+ offset instruction-length)
     (mod (+ address instruction-length) #x10000))))

;;; OK. Simple DISASM. 
;;;
;;; Keeping track of branch, jump destinations and making up labels.
;;;

#|
;;; Brute force label table

(defvar *6502-label-table* (make-array  65536 :initial-element nil))
|#

#|
(defclass 6502-location ()
  ((byte-content :accessor byte-content :initarg :byte-content)
   (instruction-content :accessor instruction-content 
			:initarg :instruction-content)
   (label :accessor label :initarg :label)))
|#

(defun branch-destination (opcode-address branch-argument)
   (let ((branch-offset (if (<= branch-argument 127)
                          branch-argument
                          (- branch-argument 256))))
     (mod (+ opcode-address 2 branch-offset) #x10000)))

(defun address-reference (instruction opcode-address)
  (if (numberp (operand instruction))
    (ecase  (address-mode instruction)
      ((:implied :immediate :accumulator) nil)
      ((:zero-page :zero-page-x :zero-page-y :zp-indirect-x :zp-indirect-y
                   :absolute :absolute-x :absolute-y :indirect)
       (operand instruction))
      ((:branch-relative)
       (branch-destination opcode-address (operand instruction))))
    nil))

(defun make-label (address)
  (format nil (if (< address 256)
                "L~2,'0X"
                "L~4,'0X") address))

(defun find-or-intern-label (address-reference label-table)
  (let ((existing-reference (aref label-table address-reference)))
    (if existing-reference
      existing-reference
      (let ((new-label (make-label address-reference)))
        (setf (aref label-table address-reference) new-label)
        new-label))))

(defun textline-6502 (instruction inst-address label-table)

  (let ((ref (address-reference instruction inst-address))
        (operand (operand instruction)))

    (with-output-to-string (str)
      (format str "~4,'0X-   " inst-address)
      (format str "~{~:[??~;~:*~2,'0X~] ~}" (bytes instruction))
      ;; ~:[ false format ~; true format ~]
      ;; the ~:* is necessary to re-use the tested element of the list as the argument to ~X
      
      (format str "~18T~@[~A~]~26T" (aref label-table inst-address))
      
      (format str "~A " (mnemonic instruction))

      (let ((zero-page-argument
             (if ref
               (format nil "~@[~A~]" (aref label-table ref))
               (if (numberp operand)
                 (format nil (if (numberp operand)
                               "$~2,'0X"
                               "~A")
                         operand)
                 operand)))
            (full-argument
             (if ref
               (format nil "~@[~A~]" (aref label-table ref))
               (if (numberp operand)
                 (format nil (if (numberp operand)
                               "$~4,'0X"
                               "~A")
                         operand)
                 operand))))
             
                
        (ecase (address-mode instruction)
          ((:implied nil))
          (:accumulator "A")
          ((:immediate) (format str "#$~:[??~;~:*~2,'0X~]" 
				(operand instruction)))
          
          (:zero-page (format str "~A" zero-page-argument))
          ((:absolute :branch-relative) (format str "~A" full-argument))
          (:absolute-x (format str "~A,X" full-argument))
          (:absolute-y (format str "~A,Y" full-argument))

          (:indirect (format str "(~A)" full-argument))
          
          (:zero-page-x (format str "~A,X" zero-page-argument))
          (:zero-page-y (format str "~A,Y" zero-page-argument))
          (:zp-indirect-x (format str "(~A,X)" zero-page-argument))
          (:zp-indirect-y (format str "(~A),Y" zero-page-argument)))))))    

#|
;;; use disasm-apple instead
(defun symbol-6502-disasm (code-array section-list array-origin label-table
                                      &optional (output-stream t))

  "Prints a text form disassembly to OUTPUT-STREAM corresponding to 6502 
assembly code in CODE-ARRAY, the first element of which corresponds 
to memory location ARRAY-ORIGIN. 

  The code sections are listed in SECTION-LIST in the format 

   ((start-offset-1 end-offset-1) 
    (start-offset-2 end-offset-2) ...)
 
LABEL-TABLE is destructively updated to contain labels for
all referred addresses in the disassembly. If it contains non-nil entries, 
these will be used as label names."

  ;;; one pass to collect addresses referenced
  ;;; this pass should also keep the instructions in some data structure
  ;;; for the second pass to re-use. 

  (labels ((extract-jump-branch-labels (start-offset end-offset)
             (let ((start-address (+ array-origin start-offset)))
               (do ((pc start-address)
                    (offset start-offset))
                   ((>= offset end-offset) nil)
                 (multiple-value-bind (inst next-offset next-address)
                                      (disassemble-6502-instruction 
				       code-array offset pc)
                   (let ((addr (address-reference inst pc)))
                     (when addr
                       (find-or-intern-label addr label-table)))
                   (setf pc next-address)
                   (setf offset next-offset)))))

  ;;; the labels for self-modifying code will be messy---
  ;;;   the code above will create a "Lxxxx" label for the byte being modified,
  ;;;   but unless it is an opcode, the label will only be used in the argument. 
  ;;; ideally, these inter-instruction labels would be replaced
  ;;;
  ;;;  A9 00   SELFMOD LDA #$00
  ;;;                  JSR COUT
  ;;;                  LDA #$01
  ;;;                  STA SELFMOD+1  (instead of STA Lxxxx)

  ;;; these label changes would have to be made by scanning a record, 
  ;;; from the first pass, of the locations where instructions begin. 

  ;;; for now, the user will have to put them in by hand.

  ;;; another pass to print with labeled references

           (disasm-code-section (start-offset end-offset)
             (let ((start-address (+ array-origin start-offset)))

               (do ((pc start-address)
                    (offset start-offset))
                   ((>= offset end-offset) (values pc offset))
                 (multiple-value-bind (inst next-offset next-address)
                                      (disassemble-6502-instruction 
				       code-array offset pc)
                   
                   (format output-stream "~A~%" (textline-6502 
						 inst pc label-table))
                   
                   (setf pc next-address)
                   (setf offset next-offset))))))

    (mapc #'(lambda (offset-pair) 
                (funcall #'extract-jump-branch-labels 
			 (car offset-pair) (second offset-pair)))
          section-list)
    (mapc #'(lambda (offset-pair)
              (funcall #'disasm-code-section (car offset-pair) 
		       (second offset-pair))
              (terpri output-stream))
          section-list)))

|#
  
#|

;;; OLD TEST CASES...use disasm-apple instead
;;;
(setf *6502-label-table* (make-array  65536 :initial-element nil)
      (aref *6502-label-table* #x800) "START"
      (aref *6502-label-table* #xfded) "COUT")

(symbol-6502-disasm #(#xa9 65 #x20 #xed #xfd #x60) 0 6 #x800 *6502-label-table*)

0800-   A9 41     START   LDA #$41
0802-   20 ED FD          JSR COUT
0805-   60                RTS 

returns 2054, 6 (i.e. #x806, 6)

(symbol-6502-disasm #(#xa9 65 #x20 #xed #xfd #xd0 #xfb #x60 #x99) 0 9 #x800 *6502-label-table*)

0800-   A9 41     START   LDA #$41
0802-   20 ED FD  L0802   JSR COUT
0805-   D0 FB             BNE L0802
0807-   60                RTS 
0808-   99 ?? ??          STA ????,Y

returns 2059, 11 (i.e. #x80a, 11)

(symbol-6502-disasm #(#xa9 65 #x20 #xed #xfd #xd0 #xfb #x60 #x99 #x01 #x02) 0 9 #x800 *6502-label-table*)

0800-   A9 41     START   LDA #$41
0802-   20 ED FD  L0802   JSR COUT
0805-   D0 FB             BNE L0802
0807-   60                RTS 
0808-   99 01 02          STA L0201,Y

returns 2059, 11 (i.e. #x80a, 11)
|#


(defun disasm-hex (code-array start-offset end-offset array-origin label-table
                                      &optional (output-stream t))
  "Display contents in CODE-ARRAY in the form of hexadecimal bytes"
  (do ((pc (+ array-origin start-offset))
       (offset start-offset))
      ((> offset end-offset) (values pc offset))
    (let* ((begin-skip (mod pc 8))
           (line-bytes (min (- (1+ end-offset) offset) (- 8 begin-skip))))
      (format output-stream "~4,'0X-   " (mod pc #x10000))
      (dotimes (i begin-skip)
        (format output-stream "   "))
      (dotimes (i line-bytes)
        (format output-stream "~2,'0X " (aref code-array (+ offset i))))
      (format output-stream "~%")
      (incf pc line-bytes)
      (incf offset line-bytes))))

(defconstant +apple-screen-ascii+
  #("^@" "^A" "^B"  "^C" "^D" "^E" "^F" "^G" "^H" "^I" "^J" "^K" "^L" "^M" "^N" "^O"
    "^P" "^Q" "^R"  "^S" "^T" "^U" "^V" "^W" "^X" "^Y" "^Z" "^[" "^\\" "^]" "^^" "^_"
    "^ " "^!" "^\"" "^#" "^$" "^%" "^&" "^'" "^(" "^)" "^*" "^+" "^," "^-" "^." "^/"
    "^0" "^1" "^2"  "^3" "^4" "^5" "^6" "^7" "^8" "^9" "^:" "^;" "^<" "^=" "^>" "^?"
    "*@" "*A" "*B"  "*C" "*D" "*E" "*F" "*G" "*H" "*I" "*J" "*K" "*L" "*M" "*N" "*O"
    "*P" "*Q" "*R"  "*S" "*T" "*U" "*V" "*W" "*X" "*Y" "*Z" "*[" "*\\" "*]" "*^" "*_"
    "* " "*!" "*\"" "*#" "*$" "*%" "*&" "*'" "*(" "*)" "**" "*+" "*," "*-" "*." "*/"
    "*0" "*1" "*2"  "*3" "*4" "*5" "*6" "*7" "*8" "*9" "*:" "*;" "*<" "*=" "*>" "*?"
    "  " " !" " \"" " #" " $" " %" " &" " '" " (" " )" " *" " +" " ," " -" " ." " /"
    " 0" " 1" " 2"  " 3" " 4" " 5" " 6" " 7" " 8" " 9" " :" " ;" " <" " =" " >" " ?"
    " @" " A" " B"  " C" " D" " E" " F" " G" " H" " I" " J" " K" " L" " M" " N" " O"
    " P" " Q" " R"  " S" " T" " U" " V" " W" " X" " Y" " Z" " [" " \\" " ]" " ^" " _"
    " `" " a" " b"  " c" " d" " e" " f" " g" " h" " i" " j" " k" " l" " m" " n" " o"
    " p" " q" " r"  " s" " t" " u" " v" " w" " x" " y" " z" " {" " |" " }" " ~" "DEL" ))
    

(defun disasm-apple-ascii (code-array start-offset end-offset array-origin label-table
                                      &optional (output-stream t))

  "Display data with ASCII text translation"
  (do ((pc (+ array-origin start-offset))
       (offset start-offset))
      ((> offset end-offset) (values pc offset))
    (let* ((begin-skip (mod pc 8))
           (line-bytes (min (- (1+ end-offset) offset) (- 8 begin-skip))))
      (format output-stream "~4,'0X-   " (mod pc #x10000))
      (dotimes (i begin-skip)
        (format output-stream "   "))
      (dotimes (i line-bytes)
        (format output-stream "~2,'0X " (aref code-array (+ offset i))))
      (dotimes (i (- 8 (+ begin-skip line-bytes)))
        (format output-stream "   "))
    ;;  (format output-stream "   \"")
      (format output-stream "   ")
      (dotimes (i line-bytes)
        (format output-stream "~3A " (aref +apple-screen-ascii+
                                           (mod (aref code-array (+ offset i)) #x80))))
      ;; (format output-stream "\"~%")
      (format output-stream "~%")
      (incf pc line-bytes)
      (incf offset line-bytes))))

#|
"Disassemble data from CODE-ARRAY, the first element of which corresponds to 
Apple II address ARRAY-ORIGIN. Output is directed to OUTPUT-STREAM.

Returns a label table.

LABELS should be a list of string descriptions of addresses, in the form

  '((\"COUT\" . #xfded)
    (\"MON\" . #xff59))

INSTRUCTIONS should contain a list of commands, such as

  '((:6502-code #x800 #x8ff)
    (:skip 2)
    (:screen-ascii #x900 #x905)
    (:6502-code #x911 #x2fff)
    (:hex #x3000 #x3100))

   where 

(:6502-code start end) 
requests a 6502 machine code description of the bytes at addresses
START to END, inclusive. 

(:hex start end)
requests an eight-column hex dump of the bytes at addresses START
to END, inclusive.
 
(:screen-ascii START END)
requests an eight-column hex dump with a annotation of Apple II
screen values.

(:skip n) 
requests N blank lines to be output."
|#

(defun disasm-apple (code-array array-origin instructions initial-labels
                                &optional (output-stream t))

  (let ((label-table (make-array  65536 :initial-element nil)))

    (labels ((extract-jump-branch-labels (start-offset end-offset)
               (let ((start-address (+ array-origin start-offset)))
                 (do ((pc start-address)
                      (offset start-offset))
                     ((> offset end-offset) nil)
                   (multiple-value-bind (inst next-offset next-address)
                                        (disassemble-6502-instruction code-array offset pc)
                     
                     (let ((addr (address-reference inst pc)))
                       (when addr
                         (find-or-intern-label addr label-table)))
                     (setf pc next-address)
                     (setf offset next-offset)))))

             (disasm-code-section (start-offset end-offset)
               (let ((start-address (+ array-origin start-offset)))
                 
                 (do ((pc start-address)
                      (offset start-offset))
                     ((> offset end-offset) (values pc offset))
                   (multiple-value-bind (inst next-offset next-address)
                                        (disassemble-6502-instruction code-array offset pc)
                     
                     (format output-stream "~A~%" (textline-6502 inst pc label-table))
                     
                     (setf pc next-address)
                     (setf offset next-offset)))))

             
             (execute-instruction (instruction)
               (ecase (car instruction)
                 (:skip (dotimes (i (second instruction))
                          (format output-stream "~%")))
                 (:6502-code (disasm-code-section (- (second instruction)
                                                     array-origin)
                                                  (- (third instruction)
                                                     array-origin)))
                 (:hex (disasm-hex code-array (- (second instruction)
                                                  array-origin)
                                               (- (third instruction)
                                                  array-origin)
                                               array-origin label-table
                                               output-stream))

                 (:screen-ascii (disasm-apple-ascii code-array (- (second instruction)
                                                                  array-origin)
                                                    (- (third instruction)
                                                       array-origin)
                                                    array-origin label-table
                                                    output-stream)))))
      
      ;;; initialize label-table
      
      (dolist (label-cons initial-labels)
        (setf (aref label-table (cdr label-cons))
              (car label-cons)))
      
      ;;; gather labels from code
      
      (mapcar (lambda (code-instruction)
                (extract-jump-branch-labels (- (second code-instruction)
                                               array-origin)
                                            (- (third code-instruction)
                                               array-origin)))
              (remove-if-not (lambda (instruction-list)
                               (eql (car instruction-list) :6502-code))
                             instructions))

      ;;; now, iterate through instructions

      (mapcar #'execute-instruction instructions))))
            
#|

;;; test cases

(disasm-apple #(#xa9 65 #x20 #xed #xfd #x60) #x800 
	      '((:6502-code #x800 #x805))
	      '(("COUT" . #xfded)
		("START" . #x800)))

0800-   A9 41     START   LDA #$41
0802-   20 ED FD          JSR COUT
0805-   60                RTS 

==> (2054) ; i.e. #x806

(disasm-apple #(#xa9 65 #x20 #xed #xfd #xd0 #xfb #x60 #x99)
	      #x800 
	      '((:6502-code #x800 #x809))
	      '(("COUT" . #xfded)
		("START" . #x800)))

0800-   A9 41     START   LDA #$41
0802-   20 ED FD  L0802   JSR COUT
0805-   D0 FB             BNE L0802
0807-   60                RTS 
0808-   99 ?? ??          STA ????,Y

==> (2059)

|#                                           
